{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "import pandas as pd\n",
    "from sklearn.datasets import make_classification\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from scorecardpipeline import *\n",
    "from automl import auto_lightgbm, logger"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = make_classification(n_samples=1000,n_features=30,n_classes=2,random_state=328)\n",
    "data = pd.DataFrame(X)\n",
    "data.columns = [f\"f{i}\" for i in range(len(data.columns))]\n",
    "data['target'] = y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def auto_logistic(data, target=\"target\", params={}, early_stopping_rounds=10, importance=1e-4, corr=0.4, psi=0.5, test_size=0.25, seed=None, max_rounds=128, mertic=\"weight\", balance_weight=0.2, C=1., class_weight=None, max_iter=128, **kwargs):\n",
    "    del_vars = []\n",
    "\n",
    "    dev, oot = train_test_split(data, test_size=test_size, random_state=seed, stratify=data[target])\n",
    "\n",
    "    for i in range(max_rounds):\n",
    "        if len(del_vars) < len(data.columns) - 1:\n",
    "            lgb_base = auto_lightgbm({\"dev\": dev.drop(columns=del_vars), \"oot\": oot.drop(columns=del_vars)}, params=params, early_stopping_rounds=early_stopping_rounds)\n",
    "            model, new_var_names = lgb_base.train(\n",
    "                                                    select_feature=True,\n",
    "                                                    select_type='shap',\n",
    "                                                    single_delete=True,\n",
    "                                                    imp_threhold=importance,\n",
    "                                                    corr_threhold=corr,\n",
    "                                                    psi_threhold=psi,\n",
    "                                                    target=mertic,\n",
    "                                                    params_weight=balance_weight,\n",
    "                                                )\n",
    "            \n",
    "            logistic = ITLubberLogisticRegression(target=target, class_weight=class_weight, C=C, max_iter=max_iter, **kwargs)\n",
    "            logistic.fit(data[new_var_names + [target]])\n",
    "            summary = logistic.summary()\n",
    "\n",
    "            if len(summary[summary[\"Coef.\"] < 0]) > 0:\n",
    "                del_vars.append(summary[summary[\"Coef.\"] < 0][\"P>|z|\"].idxmax())\n",
    "            else:\n",
    "                return logistic\n",
    "        else:\n",
    "            raise \"自动逻辑回归建模失败\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 2023-07-26 00:54:49,123 ][ INFO ][ model.py:train:75 ] 开始自动建模...\n",
      "[ 2023-07-26 00:54:49,124 ][ INFO ][ model.py:train:76 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:49,160 ][ INFO ][ methods.py:feature_select:38 ] Shap阈值 0.0001\n",
      "[ 2023-07-26 00:54:49,161 ][ INFO ][ methods.py:feature_select:39 ] shap删除特征个数：22, shap保留特征个数：8\n",
      "[ 2023-07-26 00:54:49,161 ][ INFO ][ methods.py:feature_select:40 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:49,183 ][ INFO ][ methods.py:feature_select:58 ] 相关性阈值: 0.4, 相关性删除特征个数: 2, 相关性保留特征个数: 6\n",
      "[ 2023-07-26 00:54:49,183 ][ INFO ][ methods.py:feature_select:59 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:49,211 ][ INFO ][ methods.py:feature_select:73 ] PSI阈值 0.5\n",
      "[ 2023-07-26 00:54:49,211 ][ INFO ][ methods.py:feature_select:74 ] PSI删除特征个数: 0, PSI保留特征个数: 6\n",
      "[ 2023-07-26 00:54:49,212 ][ INFO ][ methods.py:feature_select:75 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:49,212 ][ INFO ][ methods.py:auto_choose_params:133 ] 开始参数搜索,目标函数 weight\n",
      "[ 2023-07-26 00:54:49,234 ][ INFO ][ methods.py:auto_choose_params:211 ] train_number: 0, devks: 0.9278785982478097, ootks: 0.936, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 3, 'reg_alpha': 0.85, 'num_leaves': 31, 'learning_rate': 0.02, 'min_data': 50, 'min_hessian': 0.05, 'num_threads': 1, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}\n",
      "[ 2023-07-26 00:54:49,270 ][ INFO ][ methods.py:check_params:110 ] (Good) train_number: 1, devks: 0.9465809534645581, ootks: 0.96, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 13, 'reg_alpha': 0.85, 'num_leaves': 31, 'learning_rate': 0.02, 'min_data': 50, 'min_hessian': 0.05, 'num_threads': 1, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}\n",
      "[ 2023-07-26 00:54:50,303 ][ INFO ][ methods.py:check_params:110 ] (Good) train_number: 39, devks: 0.9572903629536921, ootks: 0.96, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 3, 'reg_alpha': 1.85, 'num_leaves': 31, 'learning_rate': 0.01999999999999999, 'min_data': 50, 'min_hessian': 0.05000000000000002, 'num_threads': 1, 'feature_fraction': 0.9000000000000001, 'bagging_fraction': 0.9500000000000001, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}\n",
      "[ 2023-07-26 00:54:51,397 ][ INFO ][ methods.py:auto_choose_params:235 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:51,397 ][ INFO ][ methods.py:auto_choose_params:238 ] Best params: \n",
      "[ 2023-07-26 00:54:51,398 ][ INFO ][ model.py:train:113 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:51,399 ][ INFO ][ methods.py:auto_delete_vars:246 ] 开始逐步删除特征 \t\n",
      "[ 2023-07-26 00:54:51,411 ][ INFO ][ methods.py:auto_delete_vars:270 ] train_number: 0, ootks: 0.944\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 6/6 [00:00<00:00, 88.41it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 2023-07-26 00:54:51,482 ][ INFO ][ methods.py:auto_delete_vars:289 ] (End) train_n: 6, ootks: 0.888 del_list_vars: []\n",
      "[ 2023-07-26 00:54:51,482 ][ INFO ][ methods.py:auto_delete_vars:296 ] 逐步删除特征个数: 0, 逐步保留特征个数: 6\n",
      "[ 2023-07-26 00:54:51,483 ][ INFO ][ model.py:train:118 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:51,510 ][ INFO ][ model.py:train:128 ] KS & PSI: {'devks': 0.927864375924451, 'ootks': 0.944, 'ootpsi': 0.03034708155969933}\n",
      "[ 2023-07-26 00:54:51,514 ][ INFO ][ model.py:train:129 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:51,518 ][ INFO ][ model.py:train:130 ] AutoML建模完成\n",
      "[ 2023-07-26 00:54:51,544 ][ INFO ][ model.py:train:75 ] 开始自动建模...\n",
      "[ 2023-07-26 00:54:51,545 ][ INFO ][ model.py:train:76 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:51,573 ][ INFO ][ methods.py:feature_select:38 ] Shap阈值 0.0001\n",
      "[ 2023-07-26 00:54:51,573 ][ INFO ][ methods.py:feature_select:39 ] shap删除特征个数：26, shap保留特征个数：3\n",
      "[ 2023-07-26 00:54:51,574 ][ INFO ][ methods.py:feature_select:40 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:51,586 ][ INFO ][ methods.py:feature_select:58 ] 相关性阈值: 0.4, 相关性删除特征个数: 1, 相关性保留特征个数: 2\n",
      "[ 2023-07-26 00:54:51,587 ][ INFO ][ methods.py:feature_select:59 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:51,601 ][ INFO ][ methods.py:feature_select:73 ] PSI阈值 0.5\n",
      "[ 2023-07-26 00:54:51,602 ][ INFO ][ methods.py:feature_select:74 ] PSI删除特征个数: 0, PSI保留特征个数: 2\n",
      "[ 2023-07-26 00:54:51,602 ][ INFO ][ methods.py:feature_select:75 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:51,603 ][ INFO ][ methods.py:auto_choose_params:133 ] 开始参数搜索,目标函数 weight\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 2023-07-26 00:54:51,624 ][ INFO ][ methods.py:auto_choose_params:211 ] train_number: 0, devks: 0.8719706451245877, ootks: 0.904, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 3, 'reg_alpha': 0.85, 'num_leaves': 31, 'learning_rate': 0.02, 'min_data': 50, 'min_hessian': 0.05, 'num_threads': 1, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}\n",
      "[ 2023-07-26 00:54:51,655 ][ INFO ][ methods.py:check_params:110 ] (Good) train_number: 1, devks: 0.9038002048014564, ootks: 0.92, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 13, 'reg_alpha': 0.85, 'num_leaves': 31, 'learning_rate': 0.02, 'min_data': 50, 'min_hessian': 0.05, 'num_threads': 1, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}\n",
      "[ 2023-07-26 00:54:51,795 ][ INFO ][ methods.py:check_params:110 ] (Good) train_number: 7, devks: 0.927864375924451, ootks: 0.928, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 3, 'reg_alpha': 0.85, 'num_leaves': 31, 'learning_rate': 0.02, 'min_data': 50, 'min_hessian': 0.05, 'num_threads': 1, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}\n",
      "[ 2023-07-26 00:54:52,125 ][ INFO ][ methods.py:check_params:110 ] (Good) train_number: 21, devks: 0.9679713277961088, ootks: 0.952, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 3, 'reg_alpha': 1.85, 'num_leaves': 31, 'learning_rate': 0.22, 'min_data': 50, 'min_hessian': 0.05, 'num_threads': 1, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}\n",
      "[ 2023-07-26 00:54:53,358 ][ INFO ][ methods.py:auto_choose_params:235 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:53,359 ][ INFO ][ methods.py:auto_choose_params:238 ] Best params: \n",
      "[ 2023-07-26 00:54:53,359 ][ INFO ][ model.py:train:113 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:53,359 ][ INFO ][ methods.py:auto_delete_vars:246 ] 开始逐步删除特征 \t\n",
      "[ 2023-07-26 00:54:53,372 ][ INFO ][ methods.py:auto_delete_vars:270 ] train_number: 0, ootks: 0.904\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2/2 [00:00<00:00, 69.91it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 2023-07-26 00:54:53,413 ][ INFO ][ methods.py:auto_delete_vars:289 ] (End) train_n: 2, ootks: 0.888 del_list_vars: []\n",
      "[ 2023-07-26 00:54:53,414 ][ INFO ][ methods.py:auto_delete_vars:296 ] 逐步删除特征个数: 0, 逐步保留特征个数: 2\n",
      "[ 2023-07-26 00:54:53,415 ][ INFO ][ model.py:train:118 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:53,436 ][ INFO ][ model.py:train:128 ] KS & PSI: {'devks': 0.8719706451245877, 'ootks': 0.904, 'ootpsi': 0.01525263517521588}\n",
      "[ 2023-07-26 00:54:53,436 ][ INFO ][ model.py:train:129 ] --------------------------------------------------\n",
      "[ 2023-07-26 00:54:53,437 ][ INFO ][ model.py:train:130 ] AutoML建模完成\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "logistic = auto_logistic(data, target=\"target\", params={}, early_stopping_rounds=10, importance=1e-4, corr=0.4, psi=0.5, test_size=0.25, seed=348, \n",
    "                         max_rounds=128, mertic=\"weight\", balance_weight=0.2, C=10, class_weight=None, max_iter=128)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Coef.</th>\n",
       "      <th>Std.Err</th>\n",
       "      <th>z</th>\n",
       "      <th>P&gt;|z|</th>\n",
       "      <th>[ 0.025</th>\n",
       "      <th>0.975 ]</th>\n",
       "      <th>VIF</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>const</th>\n",
       "      <td>0.454800</td>\n",
       "      <td>0.154062</td>\n",
       "      <td>2.952053</td>\n",
       "      <td>3.156689e-03</td>\n",
       "      <td>0.152838</td>\n",
       "      <td>0.756762</td>\n",
       "      <td>1.001048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>f8</th>\n",
       "      <td>3.788075</td>\n",
       "      <td>0.250967</td>\n",
       "      <td>15.093901</td>\n",
       "      <td>1.776236e-51</td>\n",
       "      <td>3.296179</td>\n",
       "      <td>4.279971</td>\n",
       "      <td>1.010374</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>f13</th>\n",
       "      <td>0.624334</td>\n",
       "      <td>0.116327</td>\n",
       "      <td>5.367080</td>\n",
       "      <td>8.002171e-08</td>\n",
       "      <td>0.396334</td>\n",
       "      <td>0.852334</td>\n",
       "      <td>1.010374</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Coef.   Std.Err          z         P>|z|   [ 0.025   0.975 ]  \\\n",
       "const  0.454800  0.154062   2.952053  3.156689e-03  0.152838  0.756762   \n",
       "f8     3.788075  0.250967  15.093901  1.776236e-51  3.296179  4.279971   \n",
       "f13    0.624334  0.116327   5.367080  8.002171e-08  0.396334  0.852334   \n",
       "\n",
       "            VIF  \n",
       "const  1.001048  \n",
       "f8     1.010374  \n",
       "f13    1.010374  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logistic.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "score",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
